Here, we look at concordance between repeated measurements of a test, i.e. replicate values taken from the same blood sample. We look at Euroimmun IgA and IgG, and Roche IgG measurements. To ensure sample independence, we consider only one blood sample per participant.
EI-S1-IgA and -IgG measurements
Prepare data:
# Rows are samples, columns repetitions (max 3)
# Interesting columns: eur_quotient_i_{IgA,IgG}
mult_euro = single_data[,grep("eur", colnames(single_data))]
# Values data frame
mult_euro_val = mult_euro[,grep("eur_quotient", colnames(mult_euro))]
## Prettify column names to "Ig{A,G} {1,2,3}"
colnames(mult_euro_val) = paste(
substr(colnames(mult_euro_val), 16, 18),
substr(colnames(mult_euro_val), 14, 14), sep=" ")
# How many values are there for the different replicates?
for (col in colnames(mult_euro_val)) {
cat(col, sum(!is.na(mult_euro_val[,col])), "\n")
}
## IgA 1 6657
## IgA 2 198
## IgA 3 8
## IgG 1 6658
## IgG 2 133
## IgG 3 4
# Too few cases of replicate 3 -> disregard
mult_euro_val = mult_euro_val[,!grepl("3", colnames(mult_euro_val))]
# Tidy up
rm(col)
Scatter plots:
# Scatter plots
plt_iga = scatter(mult_euro_val, "IgA 1", "IgA 2",
"Replicate 1", "Replicate 2",
cutoff1=cutoff$old$Eur_IgA, cutoff2=cutoff$old$Eur_IgA,
stat_x=-1.5, stat_y=0.6,
ann_x=c(0.5, 2, 0.5, 2), ann_y=c(0.08, 0.08, 10, 10),
title="EI-S1-IgA replicates")
plt_igg = scatter(mult_euro_val, "IgG 1", "IgG 2",
"Replicate 1", "Replicate 2",
cutoff1=cutoff$old$Eur_IgG, cutoff2=cutoff$old$Eur_IgG,
stat_x=-1.5, stat_y=0.6,
ann_x=c(0.5, 2, 0.5, 2), ann_y=c(0.08, 0.08, 10, 10),
title="EI-S1-IgG replicates")
# Correlation plot
corr = Hmisc::rcorr(as.matrix(log10(mult_euro_val)), type="pearson")
ggcorrplot::ggcorrplot(
corr$r, type="lower", show.diag=T, lab=T,
legend.title="", title="Pearson correlations")

# Merge plots
plt_rep_eur = ggpubr::ggarrange(plt_iga, plt_igg, nrow=1)
## Warning: Removed 6467 rows containing non-finite values (stat_smooth).
## Warning: Removed 6467 rows containing non-finite values (stat_cor).
## Warning: Removed 6467 rows containing missing values (geom_point).
## Warning: Removed 6532 rows containing non-finite values (stat_smooth).
## Warning: Removed 6532 rows containing non-finite values (stat_cor).
## Warning: Removed 6532 rows containing missing values (geom_point).
plt_rep_eur

# Save plot
for (fmt in style$output_formats) {
#ggsave(here_out(paste0("Replicates_Euroimmun_Scatter.", fmt)),
# device=fmt, dpi=style$dpi, width=7, height=3.5, units="in")
}
# Tidy up
rm(plt_iga, plt_igg, corr, fmt)
rbind(
qualitative_2x2_table(mult_euro_val, "IgA 1", "IgA 2", cutoff$old$Eur_IgA,
cutoff$old$Eur_IgA, label="IgA 1/2, Old threshold"),
qualitative_2x2_table(mult_euro_val, "IgA 1", "IgA 2", cutoff$new$Eur_IgA,
cutoff$new$Eur_IgA, label="IgA 1/2, New threshold"),
qualitative_2x2_table(mult_euro_val, "IgG 1", "IgG 2", cutoff$old$Eur_IgG,
cutoff$old$Eur_IgG, label="IgG 1/2, Old threshold"),
qualitative_2x2_table(mult_euro_val, "IgG 1", "IgG 2", cutoff$new$Eur_IgG,
cutoff$new$Eur_IgG, label="IgG 1/2, New threshold")
)
## label pp pn np nn
## 1 IgA 1/2, Old threshold 34 19 3 142
## 2 IgA 1/2, New threshold 34 19 3 142
## 3 IgG 1/2, Old threshold 37 2 1 93
## 4 IgG 1/2, New threshold 37 2 1 93
CAPTION: Scatter plots of two Euroimmun S1 IgA and Euroimmun S1 IgG measurement replicates, for the same blood sample. The blue lines indicate linear regression lines with standard errors. Pearson correlation coefficients R of the log-scaled values are also indicated in the plot. Dashed lines indicate manufacturer decision thresholds (value of 1.1 for both IgA and IgG). The numbers n indicate the numbers of points in the respective quadrants.
The correlations are good, in particular for IgG, while IgA shows more variability. IgA-IgG correlations are lower, as expected, as these measure two different features.
The scatter plots allow to assess how far off the diagonal the values are.
All integrated in a single plot:
#PerformanceAnalytics::chart.Correlation(log10(mult_euro_val), histogram=T)
Having looked so far at the quantitative results, next, we create contingency tables of the qualitative results. For simple visualization of the agreement, we use a bar plot of the percentages.
Here we use the old cut-off, as we want to compare the values of the manufacturer directly.
# Create categorical result data frame
data = mult_euro_val
iga_cols = grep("IgA", colnames(data), value=T)
igg_cols = grep("IgG", colnames(data), value=T)
data[,iga_cols] = ifelse(data[,iga_cols]>=cutoff$old$Eur_IgA,
"Positive", "Negative")
data[,igg_cols] = ifelse(data[,igg_cols]>=cutoff$old$Eur_IgG,
"Positive", "Negative")
# Create contingency matrices in data frame (separate IgA and IgG)
contis = create_contingency_matrix(data, classifiers=c("IgA", "IgG"))
# To long format
contis_long = reshape2::melt(contis, id.vars=c("Methods", "total"),
value.name="Percentage")
colnames(contis_long)[colnames(contis_long) == "variable"] = "Outcomes"
# Prettify
contis_long$Methods = stringr::str_replace_all(
contis_long$Methods, c(" & "=" \n& ", "1"="Rep. 1", "2"="Rep. 2"))
# Remember
contis_long_euroimmun = contis_long
ggplot(contis_long,
aes(fill=Outcomes, color=Outcomes, x=Methods, y=Percentage)) +
geom_bar(position="fill", stat="identity", alpha=0.5) +
geom_text(data=contis_long[!duplicated(contis_long$Methods),],
aes(x=Methods, y=1.05, label=total), color="black") +
scale_y_continuous(labels=scales::percent) +
scale_fill_manual(values=style$pal4, ) +
scale_color_manual(values=style$pal4, )

# Save plot
for (fmt in style$output_formats) {
#ggsave(here_out(paste0("Replicates_Euroimmun_Bar.", fmt)),
# device=fmt, dpi=style$dpi, width=4, height=3.5, units="in")
}
rm(data, iga_cols, igg_cols, contis, contis_long, fmt)
CAPTION: Qualitative agreement of the outcome of Euroimmun S1 IgA and Euroimmun S1 IgG measurement replicates.
We see that for IgA there are batch effects.
Ro-N-Ig
Now, we repeat basically the same for Roche. Here, we have in total 2 machines, each with up to 4 measurements (but a lot of sparsity). Prepare data:
mult_roche = single_data[,c("machine_1", "machine_2",
grep("roche_COI", colnames(single_data), value=T))]
old_machine_cols =
c("roche_COI_1_1", "roche_COI_2_1", "roche_COI_3_1", "roche_COI_4_1")
new_machine_cols =
c("roche_COI_1_2", "roche_COI_2_2", "roche_COI_3_2", "roche_COI_4_2")
if (nrow(mult_roche[mult_roche$machine_1=="new" &
!is.na(mult_roche$machine_2),]) > 0) {
stop("The Roche machine assignment is wrong.")
}
# move values where machine 1 is "new" to the second set of entries
mult_roche[mult_roche$machine_1=="new" &
!is.na(mult_roche$machine_1), new_machine_cols] =
mult_roche[mult_roche$machine_1=="new" &
!is.na(mult_roche$machine_1), old_machine_cols]
# and remove old entries
mult_roche[mult_roche$machine_1=="new" &
!is.na(mult_roche$machine_1), old_machine_cols] = NA
# Add columns aggregating machine 1 and machine 2
df = mult_roche
mult_roche$M2 =
ifelse(!is.na(df$roche_COI_4_2), df$roche_COI_4_2,
ifelse(!is.na(df$roche_COI_3_2), df$roche_COI_3_2,
ifelse(!is.na(df$roche_COI_2_2), df$roche_COI_2_2,
df$roche_COI_1_2)))
mult_roche$M1 =
ifelse(!is.na(df$roche_COI_4_1), df$roche_COI_4_1,
ifelse(!is.na(df$roche_COI_3_1), df$roche_COI_3_1,
ifelse(!is.na(df$roche_COI_2_1), df$roche_COI_2_1,
df$roche_COI_1_1)))
# Add columns aggregating replicates
mult_roche$Rep1 =
ifelse(!is.na(df$roche_COI_1_2), df$roche_COI_1_2, df$roche_COI_1_1)
mult_roche$Rep2 =
ifelse(!is.na(df$roche_COI_2_2), df$roche_COI_2_2, df$roche_COI_2_1)
mult_roche$Rep3 =
ifelse(!is.na(df$roche_COI_3_2), df$roche_COI_3_2, df$roche_COI_3_1)
mult_roche$Rep4 =
ifelse(!is.na(df$roche_COI_4_2), df$roche_COI_4_2, df$roche_COI_4_1)
# Simplify column names
rep_cols = paste(
"Roche", substr(grep("COI", colnames(mult_roche), value=T), 11, 13))
colnames(mult_roche)[grep("COI", colnames(mult_roche))] = rep_cols
# How many values are there for the different replicates?
for (col in rep_cols) {
cat(col, sum(!is.na(mult_roche[,col])), "\n")
}
## Roche 1_1 2661
## Roche 2_1 379
## Roche 3_1 3
## Roche 4_1 0
## Roche 1_2 4066
## Roche 2_2 46
## Roche 3_2 3
## Roche 4_2 1
# intersections (rep. 3 and 4 have too few entries)
rep_cols = paste("Roche", c("1_1", "2_1", "1_2", "2_2"))
for (i in 1:(length(rep_cols)-1)) {
for (j in (i+1):length(rep_cols)) {
cat(rep_cols[i], "&", rep_cols[j],
sum(!is.na(mult_roche[,rep_cols[i]]) &
!is.na(mult_roche[,rep_cols[j]])), "\n")
}
}
## Roche 1_1 & Roche 2_1 379
## Roche 1_1 & Roche 1_2 94
## Roche 1_1 & Roche 2_2 6
## Roche 2_1 & Roche 1_2 2
## Roche 2_1 & Roche 2_2 0
## Roche 1_2 & Roche 2_2 44
cat("Roche M1 & M2",
sum(!is.na(mult_roche[,"M1"]) & !is.na(mult_roche[,"M2"])), "\n")
## Roche M1 & M2 94
cat("Roche Rep1 & Rep2",
sum(!is.na(mult_roche[,"Rep1"]) & !is.na(mult_roche[,"Rep2"])), "\n")
## Roche Rep1 & Rep2 423
rm(col, rep_cols, i, j, df, old_machine_cols, new_machine_cols)
Scatter plots for single replicates
# Scatter plots
plt_11_21 = scatter(mult_roche, "Roche 1_1", "Roche 2_1",
name1="Machine 1, Replicate 1",
name2="Machine 1, Replicate 2",
cutoff1=cutoff$old$Roche, cutoff2=cutoff$old$Roche,
stat_x=0.1, stat_y=-0.5,
ann_x=c(0.3, 3, 0.3, 3), ann_y=c(0.08, 0.08, 50, 50))
plt_11_12 = scatter(mult_roche, "Roche 1_1", "Roche 1_2",
name1="Machine 1, Replicate 1",
name2="Machine 2, Replicate 1",
cutoff1=cutoff$old$Roche, cutoff2=cutoff$old$Roche,
stat_x=0.1, stat_y=-0.5,
ann_x=c(0.3, 3, 0.3, 3), ann_y=c(0.08, 0.08, 50, 50))
plt_12_22 = scatter(mult_roche, "Roche 1_2", "Roche 2_2",
name1="Machine 2, Replicate 1",
name2="Machine 2, Replicate 2",
cutoff1=cutoff$old$Roche, cutoff2=cutoff$old$Roche,
stat_x=0.1, stat_y=-0.5,
ann_x=c(0.3, 3, 0.3, 3), ann_y=c(0.08, 0.08, 50, 50))
# Merge plots
plt = ggpubr::ggarrange(plt_11_21, plt_11_12, plt_12_22, nrow=1)
## Warning: Removed 6286 rows containing non-finite values (stat_smooth).
## Warning: Removed 6286 rows containing non-finite values (stat_cor).
## Warning: Removed 6286 rows containing missing values (geom_point).
## Warning: Removed 6571 rows containing non-finite values (stat_smooth).
## Warning: Removed 6571 rows containing non-finite values (stat_cor).
## Warning: Removed 6571 rows containing missing values (geom_point).
## Warning: Removed 6621 rows containing non-finite values (stat_smooth).
## Warning: Removed 6621 rows containing non-finite values (stat_cor).
## Warning: Removed 6621 rows containing missing values (geom_point).
plt

# Save plot
for (fmt in style$output_formats) {
#ggsave(here_out(paste0("Replicates_Roche_Scatter_Single.", fmt)),
# device=fmt, dpi=style$dpi, width=10.5, height=3.5, units="in")
}
# Tidy up
rm(plt_11_21, plt_11_12, plt_12_22, plt, fmt)
CAPTION: Scatter plots of Roche N IgG measurement replicates, either on the same machine (left), or across machines (right). The repeated measurements were performed using the same blood sample. The blue lines indicate linear regression lines with standard errors. Pearson correlation coefficients R and associated p-value of the log-scaled values are also indicated in the plot. Dashed lines indicate manufacturer decision thresholds (value of 1.0). The numbers n indicate the numbers of points in the respective quadrants.
Scatter plots condensed to Machine / Replicate comparison
# Scatter plots
plt_m = scatter(mult_roche, "M1", "M2",
name1="Machine 1", name2="Machine 2",
title="Ro-N-Ig machines",
cutoff1=cutoff$old$Roche, cutoff2=cutoff$old$Roche,
stat_x=0.1, stat_y=-0.5,
ann_x=c(0.3, 3, 0.3, 3), ann_y=c(0.08, 0.08, 50, 50))
plt_r = scatter(mult_roche, "Rep1", "Rep2",
name1="Replicate 1", name2="Replicate 2",
title="Ro-N-Ig: replicates",
cutoff1=cutoff$old$Roche, cutoff2=cutoff$old$Roche,
stat_x=0.1, stat_y=-0.5,
ann_x=c(0.3, 3, 0.3, 3), ann_y=c(0.08, 0.08, 50, 50))
plt_rep_roche = ggpubr::ggarrange(plt_m, plt_r, nrow=1)
## Warning: Removed 6571 rows containing non-finite values (stat_smooth).
## Warning: Removed 6571 rows containing non-finite values (stat_cor).
## Warning: Removed 6571 rows containing missing values (geom_point).
## Warning: Removed 6242 rows containing non-finite values (stat_smooth).
## Warning: Removed 6242 rows containing non-finite values (stat_cor).
## Warning: Removed 6242 rows containing missing values (geom_point).
plt_rep_roche

# Save plot
for (fmt in style$output_formats) {
#ggsave(here_out(paste0("Replicates_Roche_Scatter.", fmt)),
# device=fmt, dpi=style$dpi, width=7, height=3.5, units="in")
}
rm(plt_m, plt_r, fmt)
rbind(
qualitative_2x2_table(mult_roche, "Roche 1_1", "Roche 2_1", cutoff$old$Roche,
cutoff$old$Roche, label="Roche 1_1/2_1, Old threshold"),
qualitative_2x2_table(mult_roche, "Roche 1_1", "Roche 2_1", cutoff$new$Roche,
cutoff$new$Roche, label="Roche 1_1/2_1, New threshold"),
qualitative_2x2_table(mult_roche, "Roche 1_1", "Roche 1_2", cutoff$old$Roche,
cutoff$old$Roche, label="Roche 1_1/1_2, Old threshold"),
qualitative_2x2_table(mult_roche, "Roche 1_1", "Roche 1_2", cutoff$new$Roche,
cutoff$new$Roche, label="Roche 1_1/1_2, New threshold")
)
## label pp pn np nn
## 1 Roche 1_1/2_1, Old threshold 0 0 0 379
## 2 Roche 1_1/2_1, New threshold 0 0 0 379
## 3 Roche 1_1/1_2, Old threshold 85 0 0 9
## 4 Roche 1_1/1_2, New threshold 86 0 1 7
Bar plot for single replicates
# Create categorical result data frame
data_res = mult_roche[paste("Roche", c("1_1", "2_1", "1_2", "2_2"))]
data_res = ifelse(data_res>=cutoff$old$Roche, "Positive", "Negative")
# Create contingency matrices in data frame
contis = create_contingency_matrix(data_res)
# To long format
contis_long = reshape2::melt(contis, id.vars=c("Methods", "total"),
value.name="Percentage")
colnames(contis_long)[colnames(contis_long) == 'variable'] = "Outcomes"
# Remove 1_2 vs 2_1 which has only 2 entries
contis_long = contis_long[
contis_long$Methods %in%c("Roche 1_1 & Roche 1_2", "Roche 1_1 & Roche 2_1",
"Roche 1_2 & Roche 2_2"),]
# Prettify Method names
contis_long$Methods = stringr::str_replace_all(
contis_long$Methods,
c(" & "="\n& ", "Roche 1_1"="M. 1 Rep. 1", "Roche 1_2"="M. 2 Rep. 1",
"Roche 2_1"="M. 1 Rep. 2", "Roche 2_2"="M. 2 Rep. 2"))
# Remember
contis_long_roche = contis_long
ggplot(contis_long, aes(fill=Outcomes, color=Outcomes,
x=Methods, y=Percentage)) +
geom_bar(position="fill", stat="identity", alpha=0.5) +
scale_y_continuous(labels=scales::percent) +
geom_text(data=contis_long[!duplicated(contis_long$Methods),],
aes(x=Methods, y=1.05, label=total), color="black") +
scale_fill_manual(values=style$pal4) + scale_color_manual(values=style$pal4)

# Save plot
for (fmt in style$output_formats) {
#ggsave(here_out(paste0("Replicates_Roche_Bar_Single.", fmt)),
#device=fmt, dpi=style$dpi, width=4, height=3.5, units="in")
}
rm(data_res, contis, contis_long, fmt)
CAPTION: Qualitative agreement of the outcome of Roche N measurement replicates. The manufacturer decision thresholds were used to decide whether a test is positive or negative.